/*
 * Copyright (C) 2019-2020 Yaong <yaongtime@gmail.com>
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/sysinfo.h>

#include "u_math.h"

#include "vc4_private.h"
#include "vk_util.h"
#include "vc4_memory.h"

#include <xf86drm.h>
#include "vc4_drm.h"

#if 0
static const struct debug_named_value debug_options[] = {
    {"cl", VC4_DEBUG_CL,
     "Dump command list during creation"},
    {"surf", VC4_DEBUG_SURFACE,
     "Dump surface layouts"},
    {"qpu", VC4_DEBUG_QPU,
     "Dump generated QPU instructions"},
    {"qir", VC4_DEBUG_QIR,
     "Dump QPU IR during program compile"},
    {"nir", VC4_DEBUG_NIR,
     "Dump NIR during program compile"},
    {"tgsi", VC4_DEBUG_TGSI,
     "Dump TGSI during program compile"},
    {"shaderdb", VC4_DEBUG_SHADERDB,
     "Dump program compile information for shader-db analysis"},
    {"perf", VC4_DEBUG_PERF,
     "Print during performance-related events"},
    {"norast", VC4_DEBUG_NORAST,
     "Skip actual hardware execution of commands"},
    {"always_flush", VC4_DEBUG_ALWAYS_FLUSH,
     "Flush after each draw call"},
    {"always_sync", VC4_DEBUG_ALWAYS_SYNC,
     "Wait for finish after each flush"},
#ifdef USE_VC4_SIMULATOR
    {"dump", VC4_DEBUG_DUMP,
     "Write a GPU command stream trace file"},
#endif
    {NULL}};

DEBUG_GET_ONCE_FLAGS_OPTION(vc4_debug, "VC4_DEBUG", debug_options, 0)
#endif
uint32_t vc4_debug = 0;

static VKAPI_ATTR void *
default_alloc_func(void *pUserData,
                   size_t size,
                   size_t align,
                   VkSystemAllocationScope allocationScope)
{
    return malloc(size);
}

static VKAPI_ATTR void *
default_realloc_func(void *pUserData,
                     void *pOriginal,
                     size_t size,
                     size_t align,
                     VkSystemAllocationScope allocationScope)
{
    return realloc(pOriginal, size);
}

static VKAPI_ATTR void
default_free_func(void *pUserData, void *pMemory)
{
    free(pMemory);
}

static const VkAllocationCallbacks default_alloc = {
    .pUserData = NULL,
    .pfnAllocation = default_alloc_func,
    .pfnReallocation = default_realloc_func,
    .pfnFree = default_free_func,
};

static int
vc4_get_instance_extension_index(const char *name)
{
   for (unsigned i = 0; i < VC4_INSTANCE_EXTENSION_COUNT; ++i) {
      if (strcmp(name, vc4_instance_extensions[i].extensionName) == 0)
         return i;
   }
   return -1;
}

VKAPI_ATTR VkResult vc4_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
{
    struct vc4_instance *instance;

    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);

    uint32_t client_version;
    if (pCreateInfo->pApplicationInfo &&
        pCreateInfo->pApplicationInfo->apiVersion != 0) {
        client_version = pCreateInfo->pApplicationInfo->apiVersion;
    }
    else {
        vc4_EnumerateInstanceVersion(&client_version);
    }

    instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
                          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
    if (!instance)
        return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);

    vk_object_base_init(NULL, &instance->base, VK_OBJECT_TYPE_INSTANCE);

    instance->api_version = client_version;
    instance->physical_device_count = -1;

    if (pAllocator)
        instance->alloc = *pAllocator;
    else
        instance->alloc = default_alloc;

    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
      const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
      int index = vc4_get_instance_extension_index(ext_name);

      if (index < 0 || !vc4_instance_extensions_supported.extensions[index]) {
         vk_free2(&default_alloc, pAllocator, instance);
         return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
      }

      instance->enabled_extensions.extensions[index] = true;
   }

    glsl_type_singleton_init_or_ref();

    *pInstance = vc4_instance_to_handle(instance);

    return VK_SUCCESS;
}

void vc4_DestroyInstance(VkInstance _instance,
                         const VkAllocationCallbacks *pAllocator)
{
    VC4_FROM_HANDLE(vc4_instance, instance, _instance);

    if (!instance)
        return;

    glsl_type_singleton_decref();

    vk_object_base_finish(&instance->base);

    vk_free(&instance->alloc, instance);
}

VkResult
vc4_EnumerateInstanceExtensionProperties(const char *pLayerName,
                                        uint32_t *pPropertyCount,
                                        VkExtensionProperties *pProperties)
{
   VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);

   /* We spport no lyaers */
   if (pLayerName)
      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);

   for (int i = 0; i < VC4_INSTANCE_EXTENSION_COUNT; i++) {
      if (vc4_instance_extensions_supported.extensions[i]) {
         vk_outarray_append(&out, prop) { *prop = vc4_instance_extensions[i]; }
      }
   }

   return vk_outarray_status(&out);
}

VkResult
vc4_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
                                    VkLayerProperties *pProperties)
{
   *pPropertyCount = 0;
   return VK_SUCCESS;
}

static VkResult
vc4_enumerate_devices(struct vc4_instance *instance)
{
    struct vc4_physical_device *device;

    device = &instance->physical_devices[0];

    //TODO: ADD MORE DEVICE INFORMATION
    device->instance = instance;
    snprintf(device->path, 20, "%s", "/dev/dri/card0");

    device->master_fd = open(device->path, O_RDWR | O_CLOEXEC);
    if (device->master_fd < 0)
    {
        return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
                         "failed to open device %s", device->path);
    }

    set_loader_magic_value(device);
    instance->physical_device_count = 1;
    device->local_fd = device->master_fd;

    vc4_wsi_init(device);

    vc4_physical_device_get_supported_extensions(device, &device->supported_extensions);

    return VK_SUCCESS;
}

VkResult
vc4_EnumeratePhysicalDevices(VkInstance _instance,
                             uint32_t *pPhysicalDeviceCount,
                             VkPhysicalDevice *pPhysicalDevices)
{
    VC4_FROM_HANDLE(vc4_instance, instance, _instance);
    VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);

    VkResult result;

    if (instance->physical_device_count < 0)
    {
        result = vc4_enumerate_devices(instance);
        if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
            return result;
    }

    for (uint32_t i = 0; i < instance->physical_device_count; ++i)
    {
        vk_outarray_append(&out, p)
        {
            *p = vc4_physical_device_to_handle(instance->physical_devices + i);
        }
    }

    return vk_outarray_status(&out);
}

void vc4_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
                                   VkPhysicalDeviceFeatures *pFeatures)
{
    memset(pFeatures, 0, sizeof(*pFeatures));

    *pFeatures = (VkPhysicalDeviceFeatures){
        .robustBufferAccess = true,
        .fullDrawIndexUint32 = true,
        .imageCubeArray = true,
        .independentBlend = true,
        .geometryShader = false,
        .tessellationShader = false,
        .sampleRateShading = true,
        .dualSrcBlend = true,
        .logicOp = true,
        .multiDrawIndirect = true,
        .drawIndirectFirstInstance = true,
        .depthClamp = true,
        .depthBiasClamp = true,
        .fillModeNonSolid = true,
        .depthBounds = true,
        .wideLines = false,
        .largePoints = true,
        .alphaToOne = true,
        .multiViewport = false,
        .samplerAnisotropy = true,
        .textureCompressionETC2 = true,
        .textureCompressionASTC_LDR = true,
        .textureCompressionBC = true,
        .occlusionQueryPrecise = true,
        .pipelineStatisticsQuery = false,
        .vertexPipelineStoresAndAtomics = false,
        .fragmentStoresAndAtomics = false,
        .shaderTessellationAndGeometryPointSize = false,
        .shaderImageGatherExtended = false,
        .shaderStorageImageExtendedFormats = false,
        .shaderStorageImageMultisample = false,
        .shaderUniformBufferArrayDynamicIndexing = false,
        .shaderSampledImageArrayDynamicIndexing = false,
        .shaderStorageBufferArrayDynamicIndexing = false,
        .shaderStorageImageArrayDynamicIndexing = false,
        .shaderStorageImageReadWithoutFormat = false,
        .shaderStorageImageWriteWithoutFormat = false,
        .shaderClipDistance = false,
        .shaderCullDistance = false,
        .shaderFloat64 = false,
        .shaderInt64 = false,
        .shaderInt16 = false,
        .sparseBinding = false,
        .variableMultisampleRate = false,
        .inheritedQueries = false,
    };
}

void vc4_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
                                     VkPhysicalDeviceProperties *pProperties)
{
    VC4_FROM_HANDLE(vc4_physical_device, pdevice, physicalDevice);
    VkSampleCountFlags sample_counts =
        VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;

    /* I have no idea what the maximum size is, but the hardware supports very
    * large numbers of descriptors (at least 2^16). This limit is based on
    * CP_LOAD_STATE6, which has a 28-bit field for the DWORD offset, so that
    * we don't have to think about what to do if that overflows, but really
    * nothing is likely to get close to this.
    */
    const size_t max_descriptor_set_size = (1 << 28);

    //TODO: NEED REWTIRE LATER
    VkPhysicalDeviceLimits limits = {
        .maxImageDimension1D = (1 << 14),
        .maxImageDimension2D = (1 << 14),
        .maxImageDimension3D = (1 << 11),
        .maxImageDimensionCube = (1 << 14),
        .maxImageArrayLayers = (1 << 11),
        .maxTexelBufferElements = 128 * 1024 * 1024,
        //   .maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE,
        //   .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
        //   .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
        //   .maxMemoryAllocationCount = UINT32_MAX,
        .maxSamplerAllocationCount = 64 * 1024,
        .bufferImageGranularity = 64,          /* A cache line */
        .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
                                               //   .maxBoundDescriptorSets = MAX_SETS,
        .maxPerStageDescriptorSamplers = max_descriptor_set_size,
        .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
        .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
        .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
        .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
        //   .maxPerStageDescriptorInputAttachments = MAX_RTS,
        .maxPerStageResources = max_descriptor_set_size,
        .maxDescriptorSetSamplers = max_descriptor_set_size,
        .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
        //   .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
        .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
        //   .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
        .maxDescriptorSetSampledImages = max_descriptor_set_size,
        .maxDescriptorSetStorageImages = max_descriptor_set_size,
        //   .maxDescriptorSetInputAttachments = MAX_RTS,
        .maxVertexInputAttributes = 32,
        .maxVertexInputBindings = 32,
        .maxVertexInputAttributeOffset = 4095,
        .maxVertexInputBindingStride = 2048,
        .maxVertexOutputComponents = 128,
        .maxTessellationGenerationLevel = 64,
        .maxTessellationPatchSize = 32,
        .maxTessellationControlPerVertexInputComponents = 128,
        .maxTessellationControlPerVertexOutputComponents = 128,
        .maxTessellationControlPerPatchOutputComponents = 120,
        .maxTessellationControlTotalOutputComponents = 4096,
        .maxTessellationEvaluationInputComponents = 128,
        .maxTessellationEvaluationOutputComponents = 128,
        .maxGeometryShaderInvocations = 32,
        .maxGeometryInputComponents = 64,
        .maxGeometryOutputComponents = 128,
        .maxGeometryOutputVertices = 256,
        .maxGeometryTotalOutputComponents = 1024,
        .maxFragmentInputComponents = 124,
        .maxFragmentOutputAttachments = 8,
        .maxFragmentDualSrcAttachments = 1,
        .maxFragmentCombinedOutputResources = 8,
        .maxComputeSharedMemorySize = 32768,
        .maxComputeWorkGroupCount = {65535, 65535, 65535},
        .maxComputeWorkGroupInvocations = 2048,
        .maxComputeWorkGroupSize = {2048, 2048, 2048},
        .subPixelPrecisionBits = 8,
        .subTexelPrecisionBits = 8,
        .mipmapPrecisionBits = 8,
        //   .maxDrawIndexedIndexValue = UINT32_MAX,
        //   .maxDrawIndirectCount = UINT32_MAX,
        .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */
        .maxSamplerAnisotropy = 16,
        //   .maxViewports = MAX_VIEWPORTS,
        .maxViewportDimensions = {(1 << 14), (1 << 14)},
        //   .viewportBoundsRange = { INT16_MIN, INT16_MAX },
        .viewportSubPixelBits = 8,
        .minMemoryMapAlignment = 4096, /* A page */
        .minTexelBufferOffsetAlignment = 64,
        .minUniformBufferOffsetAlignment = 64,
        .minStorageBufferOffsetAlignment = 64,
        .minTexelOffset = -16,
        .maxTexelOffset = 15,
        .minTexelGatherOffset = -32,
        .maxTexelGatherOffset = 31,
        .minInterpolationOffset = -0.5,
        .maxInterpolationOffset = 0.4375,
        .subPixelInterpolationOffsetBits = 4,
        .maxFramebufferWidth = (1 << 14),
        .maxFramebufferHeight = (1 << 14),
        .maxFramebufferLayers = (1 << 10),
        .framebufferColorSampleCounts = sample_counts,
        .framebufferDepthSampleCounts = sample_counts,
        .framebufferStencilSampleCounts = sample_counts,
        .framebufferNoAttachmentsSampleCounts = sample_counts,
        //   .maxColorAttachments = MAX_RTS,
        .sampledImageColorSampleCounts = sample_counts,
        .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
        .sampledImageDepthSampleCounts = sample_counts,
        .sampledImageStencilSampleCounts = sample_counts,
        .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
        .maxSampleMaskWords = 1,
        .timestampComputeAndGraphics = true,
        .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
        .maxClipDistances = 8,
        .maxCullDistances = 8,
        .maxCombinedClipAndCullDistances = 8,
        .discreteQueuePriorities = 1,
        .pointSizeRange = {1, 4092},
        .lineWidthRange = {0.0, 7.9921875},
        .pointSizeGranularity = 0.0625,
        .lineWidthGranularity = (1.0 / 128.0),
        .strictLines = false, /* FINISHME */
        .standardSampleLocations = true,
        .optimalBufferCopyOffsetAlignment = 128,
        .optimalBufferCopyRowPitchAlignment = 128,
        .nonCoherentAtomSize = 64,
    };

    *pProperties = (VkPhysicalDeviceProperties){
        .apiVersion = vc4_physical_device_api_version(pdevice),
        .driverVersion = vk_get_driver_version(),
        .vendorID = 0x55, /* TODO */
        .deviceID = 0xaa,
        .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
        .limits = limits,
        .sparseProperties = {0},
    };

    strcpy(pProperties->deviceName, pdevice->name);
    memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
}

void
vc4_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
                                VkPhysicalDeviceProperties2 *pProperties)
{
   VC4_FROM_HANDLE(vc4_physical_device, pdevice, physicalDevice);
   vc4_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);

   // vk_foreach_struct(ext, pProperties->pNext)
   // {
   //    switch (ext->sType) {
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
   //       VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
   //          (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
   //       properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
   //       VkPhysicalDeviceIDProperties *properties =
   //          (VkPhysicalDeviceIDProperties *) ext;
   //       memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
   //       memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
   //       properties->deviceLUIDValid = false;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
   //       VkPhysicalDeviceMultiviewProperties *properties =
   //          (VkPhysicalDeviceMultiviewProperties *) ext;
   //       properties->maxMultiviewViewCount = MAX_VIEWS;
   //       properties->maxMultiviewInstanceIndex = INT_MAX;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
   //       VkPhysicalDevicePointClippingProperties *properties =
   //          (VkPhysicalDevicePointClippingProperties *) ext;
   //       properties->pointClippingBehavior =
   //          VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
   //       VkPhysicalDeviceMaintenance3Properties *properties =
   //          (VkPhysicalDeviceMaintenance3Properties *) ext;
   //       /* Make sure everything is addressable by a signed 32-bit int, and
   //        * our largest descriptors are 96 bytes. */
   //       properties->maxPerSetDescriptors = (1ull << 31) / 96;
   //       /* Our buffer size fields allow only this much */
   //       properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
   //       VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
   //          (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;

   //       properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
   //       properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
   //       properties->maxTransformFeedbackBufferSize = UINT32_MAX;
   //       properties->maxTransformFeedbackStreamDataSize = 512;
   //       properties->maxTransformFeedbackBufferDataSize = 512;
   //       properties->maxTransformFeedbackBufferDataStride = 512;
   //       properties->transformFeedbackQueries = true;
   //       properties->transformFeedbackStreamsLinesTriangles = false;
   //       properties->transformFeedbackRasterizationStreamSelect = false;
   //       properties->transformFeedbackDraw = true;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
   //       VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
   //          (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
   //       properties->sampleLocationSampleCounts = 0;
   //       if (pdevice->supported_extensions.EXT_sample_locations) {
   //          properties->sampleLocationSampleCounts =
   //             VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
   //       }
   //       properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
   //       properties->sampleLocationCoordinateRange[0] = 0.0f;
   //       properties->sampleLocationCoordinateRange[1] = 0.9375f;
   //       properties->sampleLocationSubPixelBits = 4;
   //       properties->variableSampleLocations = true;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
   //       VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
   //          (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
   //       properties->filterMinmaxImageComponentMapping = true;
   //       properties->filterMinmaxSingleComponentFormats = true;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
   //       VkPhysicalDeviceSubgroupProperties *properties =
   //          (VkPhysicalDeviceSubgroupProperties *)ext;
   //       properties->subgroupSize = 64;
   //       properties->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
   //       properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
   //                                         VK_SUBGROUP_FEATURE_VOTE_BIT;
   //       properties->quadOperationsInAllStages = false;
   //       break;
   //    }
   //    case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
   //       VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
   //          (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
   //       props->maxVertexAttribDivisor = UINT32_MAX;
   //       break;
   //    }
   //    default:
   //       break;
   //    }
   // }
}

VkResult
vc4_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice,
                                      const char *pLayerName,
                                      uint32_t *pPropertyCount,
                                      VkExtensionProperties *pProperties)
{
   /* We spport no lyaers */
   VC4_FROM_HANDLE(vc4_physical_device, device, physicalDevice);
   VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);

   /* We spport no lyaers */
   if (pLayerName)
      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);

   for (int i = 0; i < VC4_DEVICE_EXTENSION_COUNT; i++) {
      if (device->supported_extensions.extensions[i]) {
         vk_outarray_append(&out, prop) { *prop = vc4_device_extensions[i]; }
      }
   }

   return vk_outarray_status(&out);
}

void
vc4_GetDeviceQueue2(VkDevice _device,
                   const VkDeviceQueueInfo2 *pQueueInfo,
                   VkQueue *pQueue)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   struct vc4_queue *queue;

   queue =
      &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
   if (pQueueInfo->flags != queue->flags) {
      /* From the Vulkan 1.1.70 spec:
       *
       * "The queue returned by vkGetDeviceQueue2 must have the same
       * flags value from this structure as that used at device
       * creation time in a VkDeviceQueueCreateInfo instance. If no
       * matching flags were specified at device creation time then
       * pQueue will return VK_NULL_HANDLE."
       */
      *pQueue = VK_NULL_HANDLE;
      return;
   }

   *pQueue = vc4_queue_to_handle(queue);
}

static uint64_t
vc4_get_system_heap_size()
{
   struct sysinfo info;
   sysinfo(&info);

   uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;

   /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
    * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
    */
   uint64_t available_ram;
   if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
      available_ram = total_ram / 2;
   else
      available_ram = total_ram * 3 / 4;

   return available_ram;
}

void
vc4_GetPhysicalDeviceMemoryProperties(
   VkPhysicalDevice physicalDevice,
   VkPhysicalDeviceMemoryProperties *pMemoryProperties)
{
    pMemoryProperties->memoryHeapCount = 1;
    pMemoryProperties->memoryHeaps[0].size = vc4_get_system_heap_size();
    pMemoryProperties->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;

    pMemoryProperties->memoryTypeCount = 1;
    pMemoryProperties->memoryTypes[0].propertyFlags =
        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
    pMemoryProperties->memoryTypes[0].heapIndex = 0;
}

void
vc4_GetDeviceQueue(VkDevice _device,
                  uint32_t queueFamilyIndex,
                  uint32_t queueIndex,
                  VkQueue *pQueue)
{
   const VkDeviceQueueInfo2 info =
      (VkDeviceQueueInfo2) { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
                             .queueFamilyIndex = queueFamilyIndex,
                             .queueIndex = queueIndex };

   vc4_GetDeviceQueue2(_device, &info, pQueue);
}

static VkResult
vc4_queue_init(struct vc4_device *device,
              struct vc4_queue *queue,
              uint32_t queue_family_index,
              int idx,
              VkDeviceQueueCreateFlags flags)
{
   set_loader_magic_value(queue);
   queue->device = device;
   queue->queue_family_index = queue_family_index;
   queue->queue_idx = idx;
   queue->flags = flags;
   queue->job_syncobj = device->last_job_sync;

   return VK_SUCCESS;
}

static int
vc4_get_device_extension_index(const char *name)
{
   for (unsigned i = 0; i < VC4_DEVICE_EXTENSION_COUNT; ++i) {
      if (strcmp(name, vc4_device_extensions[i].extensionName) == 0)
         return i;
   }
   return -1;
}

static bool
vc4_has_feature(struct vc4_device *device, uint32_t feature)
{
        struct drm_vc4_get_param p = {
                .param = feature,
        };
        int ret = drmIoctl(device->fd, DRM_IOCTL_VC4_GET_PARAM, &p);

        if (ret != 0)
                return false;

        return p.value;
}

VkResult
vc4_CreateDevice(VkPhysicalDevice physicalDevice,
                 const VkDeviceCreateInfo *pCreateInfo,
                 const VkAllocationCallbacks *pAllocator,
                 VkDevice *pDevice)
{
	VC4_FROM_HANDLE(vc4_physical_device, physical_device, physicalDevice);
	VkResult result;
	struct vc4_device *device;

	/* Check enabled features */
	if (pCreateInfo->pEnabledFeatures) {
		VkPhysicalDeviceFeatures supported_features;
		vc4_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
		VkBool32 *supported_feature = (VkBool32 *)&supported_features;
		VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
		unsigned num_features =
			sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
		for (uint32_t i = 0; i < num_features; i++)
		{
			if (enabled_feature[i] && !supported_feature[i])
				return vk_error(physical_device->instance,
								VK_ERROR_FEATURE_NOT_PRESENT);
		}
	}

	device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
						sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
	if (!device)
		return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

	vk_device_init(&device->vk, pCreateInfo,
			&physical_device->instance->alloc, pAllocator);

	device->instance = physical_device->instance;
	device->physical_device = physical_device;
	device->_lost = false;

	device->fd = physical_device->local_fd;

	uint64_t syncobj_cap = 0;
   if (drmGetCap(device->fd, DRM_CAP_SYNCOBJ, &syncobj_cap) == 0 && syncobj_cap) {
      device->has_syncobj = true;
      int ret = drmSyncobjCreate(device->fd,
                                 DRM_SYNCOBJ_CREATE_SIGNALED,
                                 &device->last_job_sync);
      if (ret) {
         fprintf(stderr, "create syncobj fail %s %d\n", __func__, __LINE__);
         assert(ret == 0);
      }
   }

   pthread_mutex_init(&device->mutex, NULL);

   for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
	const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
	int index = vc4_get_device_extension_index(ext_name);
	if (index < 0 ||
			!physical_device->supported_extensions.extensions[index]) {
		vk_free(&device->vk.alloc, device);
		return vk_error(physical_device->instance,
						VK_ERROR_EXTENSION_NOT_PRESENT);
	}

		device->enabled_extensions.extensions[index] = true;
	}

	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
		const VkDeviceQueueCreateInfo *queue_create =
			&pCreateInfo->pQueueCreateInfos[i];
		uint32_t qfi = queue_create->queueFamilyIndex;
		device->queues[qfi] = vk_alloc(
			&device->vk.alloc, queue_create->queueCount * sizeof(struct vc4_queue),
			8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
		if (!device->queues[qfi]) {
			result = VK_ERROR_OUT_OF_HOST_MEMORY;
			goto fail_queues;
		}

		memset(device->queues[qfi], 0,
				queue_create->queueCount * sizeof(struct vc4_queue));

		device->queue_count[qfi] = queue_create->queueCount;

		for (unsigned q = 0; q < queue_create->queueCount; q++) {
			result = vc4_queue_init(device, &device->queues[qfi][q], qfi, q,
									queue_create->flags);
			if (result != VK_SUCCESS)
				goto fail_queues;
		}
	}

   device->has_control_flow =
       vc4_has_feature(device, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
   // device->has_etc1 =
   //     vc4_has_feature(device, DRM_VC4_PARAM_SUPPORTS_ETC1);
   device->has_threaded_fs =
       vc4_has_feature(device, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
   // device->has_madvise =
   //     vc4_has_feature(device, DRM_VC4_PARAM_SUPPORTS_MADVISE);
   // device->has_perfmon_ioctl =
   //     vc4_has_feature(device, DRM_VC4_PARAM_SUPPORTS_PERFMON);

   *pDevice = vc4_device_to_handle(device);
	return VK_SUCCESS;

//TODO:
fail_queues:
    return result;
}

static void
vc4_queue_finish(struct vc4_queue *queue)
{
   //TODO:
}

void
vc4_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);

   if (!device)
      return;

   for (unsigned i = 0; i < VC4_MAX_QUEUE_FAMILIES; i++) {
      for (unsigned q = 0; q < device->queue_count[i]; q++)
         vc4_queue_finish(&device->queues[i][q]);
      if (device->queue_count[i]) {
         vk_device_finish(&device->vk);
         vk_free(&device->vk.alloc, device->queues[i]);
      }
   }

   vk_free(&device->vk.alloc, device);
}

VkResult
vc4_CreateBuffer(VkDevice _device,
                const VkBufferCreateInfo *pCreateInfo,
                const VkAllocationCallbacks *pAllocator,
                VkBuffer *pBuffer)
{
    VC4_FROM_HANDLE(vc4_device, device, _device);
    struct vc4_buffer *buffer;

    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);

    buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    if (buffer == NULL)
        return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

    buffer->size = pCreateInfo->size;
    buffer->usage = pCreateInfo->usage;
    buffer->flags = pCreateInfo->flags;

    *pBuffer = vc4_buffer_to_handle(buffer);

    return VK_SUCCESS;
}

void vc4_DestroyBuffer(VkDevice _device,
                      VkBuffer _buffer,
                      const VkAllocationCallbacks *pAllocator)
{
    VC4_FROM_HANDLE(vc4_device, device, _device);
    VC4_FROM_HANDLE(vc4_buffer, buffer, _buffer);

    if (!buffer)
        return;

    vk_free2(&device->vk.alloc, pAllocator, buffer);
}

static void
vc4_init_sampler(struct vc4_device *device,
                struct vc4_sampler *sampler,
                const VkSamplerCreateInfo *pCreateInfo)
{
   sampler->create_info = *pCreateInfo;
}

VkResult
vc4_CreateSampler(VkDevice _device,
                 const VkSamplerCreateInfo *pCreateInfo,
                 const VkAllocationCallbacks *pAllocator,
                 VkSampler *pSampler)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   struct vc4_sampler *sampler;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);

   sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
                             VK_OBJECT_TYPE_SAMPLER);
   if (!sampler)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   vc4_init_sampler(device, sampler, pCreateInfo);
   *pSampler = vc4_sampler_to_handle(sampler);

   return VK_SUCCESS;
}

void
vc4_DestroySampler(VkDevice _device,
                  VkSampler _sampler,
                  const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_sampler, sampler, _sampler);

   vk_object_free(&device->vk, pAllocator, sampler);
}

static VkResult
vc4_alloc_memory(struct vc4_device *device,
                const VkMemoryAllocateInfo *pAllocateInfo,
                const VkAllocationCallbacks *pAllocator,
                VkDeviceMemory *pMem)
{
   struct vc4_device_memory *mem;
   VkResult result;

   assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);

   if (pAllocateInfo->allocationSize == 0) {
      /* Apparently, this is allowed */
      *pMem = VK_NULL_HANDLE;
      return VK_SUCCESS;
   }

   mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (mem == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   const VkImportMemoryFdInfoKHR *fd_info =
      vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
   if (fd_info && !fd_info->handleType)
      fd_info = NULL;

   if (fd_info) {
    //   assert(fd_info->handleType ==
    //             VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
    //          fd_info->handleType ==
    //             VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);

      /*
       * TODO Importing the same fd twice gives us the same handle without
       * reference counting.  We need to maintain a per-instance handle-to-bo
       * table and add reference count to tu_bo.
       */
    //   result = vc4_bo_init_dmabuf(device, &mem->bo,
    //                              pAllocateInfo->allocationSize, fd_info->fd);
    //   if (result == VK_SUCCESS) {
    //      /* take ownership and close the fd */
    //      close(fd_info->fd);
    //   }
   } else {
      result = vc4_vk_bo_init_alloc(device, &mem->bo, (uint32_t)pAllocateInfo->allocationSize);
   }

   if (result != VK_SUCCESS) {
      vk_free2(&device->vk.alloc, pAllocator, mem);
      return result;
   }

   mem->size = pAllocateInfo->allocationSize;
   mem->type_index = pAllocateInfo->memoryTypeIndex;

//    mem->map = NULL;
//    mem->user_ptr = NULL;

   *pMem = vc4_device_memory_to_handle(mem);

   return VK_SUCCESS;
}

VkResult
vc4_AllocateMemory(VkDevice _device,
                  const VkMemoryAllocateInfo *pAllocateInfo,
                  const VkAllocationCallbacks *pAllocator,
                  VkDeviceMemory *pMem)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   return vc4_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
}

void
vc4_FreeMemory(VkDevice _device,
              VkDeviceMemory _mem,
              const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_device_memory, mem, _mem);

   if (mem == NULL)
      return;

   vc4_vk_bo_free_mem(device, &mem->bo);
   vk_free2(&device->vk.alloc, pAllocator, mem);
}

VkResult
vc4_FlushMappedMemoryRanges(VkDevice _device,
                            uint32_t memoryRangeCount,
                            const VkMappedMemoryRange *pMemoryRanges)
{
   return VK_SUCCESS;
}

VkResult
vc4_InvalidateMappedMemoryRanges(VkDevice _device,
                                 uint32_t memoryRangeCount,
                                 const VkMappedMemoryRange *pMemoryRanges)
{
   return VK_SUCCESS;
}

void
vc4_GetImageMemoryRequirements(VkDevice _device,
                              VkImage _image,
                              VkMemoryRequirements *pMemoryRequirements)
{
   VC4_FROM_HANDLE(vc4_image, image, _image);

   pMemoryRequirements->memoryTypeBits = 0x03;
   pMemoryRequirements->size = image->size;
   pMemoryRequirements->alignment = image->alignment;
}

void
vc4_GetImageMemoryRequirements2(VkDevice device,
                               const VkImageMemoryRequirementsInfo2 *pInfo,
                               VkMemoryRequirements2 *pMemoryRequirements)
{
   vc4_GetImageMemoryRequirements(device, pInfo->image,
                                 &pMemoryRequirements->memoryRequirements);
}

static VkResult
device_map(struct vc4_device *device, struct vc4_device_memory *mem)
{
   assert(mem);

   /* From the spec:
    *
    *   "After a successful call to vkMapMemory the memory object memory is
    *   considered to be currently host mapped. It is an application error to
    *   call vkMapMemory on a memory object that is already host mapped."
    *
    * We are not concerned with this ourselves (validation layers should
    * catch these errors and warn users), however, the driver may internally
    * map things (for example for debug CLIF dumps or some CPU-side operations)
    * so by the time the user calls here the buffer might already been mapped
    * internally by the driver.
    */
   if (mem->bo.map) {
      // assert(mem->bo.map_size == mem->bo.size);
      return VK_SUCCESS;
   }

   bool ok = vc4_bo_map(device, &mem->bo);
   if (!ok)
      return VK_ERROR_MEMORY_MAP_FAILED;

   return VK_SUCCESS;
}

static void
device_unmap(struct vc4_device *device, struct vc4_device_memory *mem)
{
   assert(mem && mem->bo.map);
   vc4_bo_unmap(device, &mem->bo);
}

VkResult
vc4_MapMemory(VkDevice _device,
               VkDeviceMemory _memory,
               VkDeviceSize offset,
               VkDeviceSize size,
               VkMemoryMapFlags flags,
               void **ppData)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_device_memory, mem, _memory);

   if (mem == NULL) {
      *ppData = NULL;
      return VK_SUCCESS;
   }

   assert(offset < mem->bo.size);

   /* Since the driver can map BOs internally as well and the mapped range
    * required by the user or the driver might not be the same, we always map
    * the entire BO and then add the requested offset to the start address
    * of the mapped region.
    */
   VkResult result = device_map(device, mem);
   if (result != VK_SUCCESS)
      return vk_error(device->instance, result);

   *ppData = ((uint8_t *) mem->bo.map) + offset;
   return VK_SUCCESS;
}

void
vc4_UnmapMemory(VkDevice _device,
                 VkDeviceMemory _memory)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_device_memory, mem, _memory);

   if (mem == NULL)
      return;

   device_unmap(device, mem);
}

static int
vc4_bo_export_dmabuf(struct vc4_device *dev, struct vc4_bo *bo)
{
   return vc4_gem_export_dmabuf(dev, bo->handle);
}

VkResult
vc4_GetMemoryFdKHR(VkDevice _device,
                  const VkMemoryGetFdInfoKHR *pGetFdInfo,
                  int *pFd)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_device_memory, memory, pGetFdInfo->memory);

   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);

   /* At the moment, we support only the below handle types. */
   assert(pGetFdInfo->handleType ==
             VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
          pGetFdInfo->handleType ==
             VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);

   int prime_fd = vc4_bo_export_dmabuf(device, &memory->bo);
   if (prime_fd < 0)
      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);

   *pFd = prime_fd;
   return VK_SUCCESS;
}

void
vc4_GetBufferMemoryRequirements(VkDevice _device,
                               VkBuffer _buffer,
                               VkMemoryRequirements *pMemoryRequirements)
{
   VC4_FROM_HANDLE(vc4_buffer, buffer, _buffer);

   pMemoryRequirements->memoryTypeBits = 1;
   pMemoryRequirements->alignment = 1024;
   pMemoryRequirements->size =
      align64(buffer->size, pMemoryRequirements->alignment);
}

VkResult
vc4_BindBufferMemory2(VkDevice device,
                     uint32_t bindInfoCount,
                     const VkBindBufferMemoryInfo *pBindInfos)
{
   for (uint32_t i = 0; i < bindInfoCount; ++i) {
      VC4_FROM_HANDLE(vc4_device_memory, mem, pBindInfos[i].memory);
      VC4_FROM_HANDLE(vc4_buffer, buffer, pBindInfos[i].buffer);

      if (mem) {
         buffer->bo = &mem->bo;
         buffer->bo_offset = pBindInfos[i].memoryOffset;
      } else {
         buffer->bo = NULL;
      }
   }
   return VK_SUCCESS;
}

VkResult
vc4_BindBufferMemory(VkDevice device,
                    VkBuffer buffer,
                    VkDeviceMemory memory,
                    VkDeviceSize memoryOffset)
{
   const VkBindBufferMemoryInfo info = {
      .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
      .buffer = buffer,
      .memory = memory,
      .memoryOffset = memoryOffset
   };

   return vc4_BindBufferMemory2(device, 1, &info);
}

VkResult
vc4_CreateFramebuffer(VkDevice _device,
                       const VkFramebufferCreateInfo *pCreateInfo,
                       const VkAllocationCallbacks *pAllocator,
                       VkFramebuffer *pFramebuffer)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   struct vc4_framebuffer *framebuffer;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);

   size_t size = sizeof(*framebuffer) +
                 sizeof(struct vc4_image_view *) * pCreateInfo->attachmentCount;
   framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (framebuffer == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   framebuffer->width = pCreateInfo->width;
   framebuffer->height = pCreateInfo->height;
   framebuffer->layers = pCreateInfo->layers;
   framebuffer->attachment_count = pCreateInfo->attachmentCount;
   framebuffer->color_attachment_count = 0;
   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
      framebuffer->attachments[i] =
         vc4_image_view_from_handle(pCreateInfo->pAttachments[i]);
      if (framebuffer->attachments[i]->aspects & VK_IMAGE_ASPECT_COLOR_BIT)
         framebuffer->color_attachment_count++;
   }

   *pFramebuffer = vc4_framebuffer_to_handle(framebuffer);

   return VK_SUCCESS;
}

void
vc4_DestroyFramebuffer(VkDevice _device,
                        VkFramebuffer _fb,
                        const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_framebuffer, fb, _fb);

   if (!fb)
      return;

   vk_free2(&device->vk.alloc, pAllocator, fb);
}

VkResult
vc4_CreateSemaphore(VkDevice _device,
                     const VkSemaphoreCreateInfo *pCreateInfo,
                     const VkAllocationCallbacks *pAllocator,
                     VkSemaphore *pSemaphore)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);

   struct vc4_semaphore *sem =
      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(struct vc4_semaphore), 8,
               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (sem == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   sem->fd = -1;

   int ret = drmSyncobjCreate(device->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &sem->sync);
   if (ret) {
      vk_free2(&device->vk.alloc, pAllocator, sem);
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
   }

   *pSemaphore = vc4_semaphore_to_handle(sem);

   return VK_SUCCESS;
}

void
vc4_DestroySemaphore(VkDevice _device,
                      VkSemaphore semaphore,
                      const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_semaphore, sem, semaphore);

   if (sem == NULL)
      return;

   drmSyncobjDestroy(device->fd, sem->sync);

   if (sem->fd != -1)
      close(sem->fd);

   vk_free2(&device->vk.alloc, pAllocator, sem);
}

VkResult
vc4_CreateEvent(VkDevice _device,
                 const VkEventCreateInfo *pCreateInfo,
                 const VkAllocationCallbacks *pAllocator,
                 VkEvent *pEvent)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   struct vc4_event *event =
      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (!event)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   /* Events are created in the unsignaled state */
   event->state = false;
   *pEvent = vc4_event_to_handle(event);

   return VK_SUCCESS;
}

void
vc4_DestroyEvent(VkDevice _device,
                  VkEvent _event,
                  const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_event, event, _event);

   if (!event)
      return;

   vk_free2(&device->vk.alloc, pAllocator, event);
}

VkResult
vc4_GetEventStatus(VkDevice _device, VkEvent _event)
{
   VC4_FROM_HANDLE(vc4_event, event, _event);
   return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET;
}

VkResult
vc4_SetEvent(VkDevice _device, VkEvent _event)
{
   VC4_FROM_HANDLE(vc4_event, event, _event);
   p_atomic_set(&event->state, 1);
   return VK_SUCCESS;
}

VkResult
vc4_ResetEvent(VkDevice _device, VkEvent _event)
{
   VC4_FROM_HANDLE(vc4_event, event, _event);
   p_atomic_set(&event->state, 0);
   return VK_SUCCESS;
}

static const VkQueueFamilyProperties vc4_queue_family_properties = {
    .queueFlags =
        VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
    .queueCount = 1,
    .timestampValidBits = 48,
    .minImageTransferGranularity = {1, 1, 1},
};

void vc4_GetPhysicalDeviceQueueFamilyProperties(
    VkPhysicalDevice physicalDevice,
    uint32_t *pQueueFamilyPropertyCount,
    VkQueueFamilyProperties *pQueueFamilyProperties)
{
    VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);

    vk_outarray_append(&out, p) { *p = vc4_queue_family_properties; }
}

void vc4_GetPhysicalDeviceQueueFamilyProperties2(
    VkPhysicalDevice physicalDevice,
    uint32_t *pQueueFamilyPropertyCount,
    VkQueueFamilyProperties2 *pQueueFamilyProperties)
{
    VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);

    vk_outarray_append(&out, p)
    {
        p->queueFamilyProperties = vc4_queue_family_properties;
    }
}

VkResult
vc4_QueueWaitIdle(VkQueue _queue)
{
   VC4_FROM_HANDLE(vc4_queue, queue, _queue);

   int ret;

   ret = drmSyncobjWait(queue->device->fd, &queue->job_syncobj, 1,
                        0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
   if (ret) {
      fprintf(stderr, "Wait fence error %s %d, ret = %d\n", __FUNCTION__, __LINE__, ret);
   }
   assert(ret == 0);

   return VK_SUCCESS;
}

VkResult
vc4_DeviceWaitIdle(VkDevice _device)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);

   // if (tu_device_is_lost(device))
   //    return VK_ERROR_DEVICE_LOST;

   // for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
   //    for (unsigned q = 0; q < device->queue_count[i]; q++) {
   //       tu_QueueWaitIdle(tu_queue_to_handle(&device->queues[i][q]));
   //    }
   // }
   return VK_SUCCESS;
}

/* vk_icd.h does not declare this function, so we declare it here to
 * suppress Wmissing-prototypes.
 */
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);

PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
{
   /* For the full details on loader interface versioning, see
    * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
    * What follows is a condensed summary, to help you navigate the large and
    * confusing official doc.
    *
    *   - Loader interface v0 is incompatible with later versions. We don't
    *     support it.
    *
    *   - In loader interface v1:
    *       - The first ICD entrypoint called by the loader is
    *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
    *         entrypoint.
    *       - The ICD must statically expose no other Vulkan symbol unless it
    * is linked with -Bsymbolic.
    *       - Each dispatchable Vulkan handle created by the ICD must be
    *         a pointer to a struct whose first member is VK_LOADER_DATA. The
    *         ICD must initialize VK_LOADER_DATA.loadMagic to
    * ICD_LOADER_MAGIC.
    *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
    *         vkDestroySurfaceKHR(). The ICD must be capable of working with
    *         such loader-managed surfaces.
    *
    *    - Loader interface v2 differs from v1 in:
    *       - The first ICD entrypoint called by the loader is
    *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
    *         statically expose this entrypoint.
    *
    *    - Loader interface v3 differs from v2 in:
    *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
    *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
    *          because the loader no longer does so.
    */
   *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
   return VK_SUCCESS;
}

PFN_vkVoidFunction
vc4_GetInstanceProcAddr(VkInstance _instance, const char *pName);

PFN_vkVoidFunction
vc4_GetInstanceProcAddr(VkInstance _instance, const char *pName)
{
    VC4_FROM_HANDLE(vc4_instance, instance, _instance);

    return vc4_lookup_entrypoint_checked(
        pName, instance ? instance->api_version : 0,
        instance ? &instance->enabled_extensions : NULL, NULL);
}

/* The loader wants us to expose a second GetInstanceProcAddr function
 * to work around certain LD_PRELOAD issues seen in apps.
 */
PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName);

PFN_vkVoidFunction
vc4_GetDeviceProcAddr(VkDevice _device, const char *pName);

PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
{
    return vc4_GetInstanceProcAddr(instance, pName);
}

PFN_vkVoidFunction
vc4_GetDeviceProcAddr(VkDevice _device, const char *pName)
{
    VC4_FROM_HANDLE(vc4_device, device, _device);

    return vc4_lookup_entrypoint_checked(pName, device->instance->api_version,
                                         &device->instance->enabled_extensions,
                                         &device->enabled_extensions);
}
